require(Seurat)
require(DT) # for datatable
require(SeuratWrappers) # for fastMNN
require(harmony) # for RunHarmony
require(reshape2) # for dcast
ImportTable <- function(file, header = T, row.names = 1, sep = ',', check.names = FALSE, ...){
data <- read.csv(file = file, header = header, row.names = row.names, sep = sep, check.names = check.names, ...)
return(data)
}
show_table <- function(df, rownames = T, filter="top", options = list(pageLength = 10, scrollX=T), ...){
if (ncol(df) > 20) {
df <- df[, 1:20]
message('Due to the column dim is > 20, it only shows the ')
}
datatable(df, rownames = rownames,
filter=filter, options = options, ... )
}
tax_name <- ImportTable(file = '../00.ProcessData/2021-04-12-allTaxonomySplitLevel-v1.0.1.tsv', sep = '\t')
tax_name$new_name <- gsub(pattern = 's__', replacement = '', x = tax_name$Specie) %>%
gsub('_', ' ', .) # due to Feature names of CreateSeuratObject cannot have underscores, replace with blanket space.
show_table(tax_name)
tax_df <- ImportTable(file = '../00.ProcessData/2021-04-12-ReAbun-Fungi-filter1802-v1.0.0.tsv', sep = '\t')
# tax_df <- ImportTable(file = '../00.ProcessData/2021-04-12-RawData-Fungi-filter1802-v1.0.0.tsv', sep = '\t')
rownames(tax_df) <- tax_name[rownames(tax_df), "new_name"] # use the short species names
show_table(tax_df)
## Due to the column dim is > 20, it only shows the
meta_df <- ImportTable(file = '../00.ProcessData/metaInfo-subgroup-v4.1.csv')
meta_df <- meta_df[colnames(tax_df),]
show_table(meta_df)
CreateSeuratObject: Create a Seurat object link
s_obj <- CreateSeuratObject(counts = tax_df, meta.data = meta_df)
# split the object by cohorts
s_list <- SplitObject(s_obj, split.by = 'Cohort')
Fast integration using reciprocal PCA (RPCA) link
for (i in 1:length(s_list)) {
s_list[[i]] <- NormalizeData(s_list[[i]], verbose = FALSE)
s_list[[i]] <- FindVariableFeatures(s_list[[i]], selection.method = "vst", nfeatures = 2000, verbose = FALSE)
}
s_anchors <- FindIntegrationAnchors(object.list = s_list, k.anchor = 20)
s_integrated <- IntegrateData(anchorset = s_anchors, k.weight = 10) %>%
ScaleData(. , verbose = FALSE) %>%
RunPCA(. , npcs = 30, verbose = FALSE) %>%
RunUMAP(. , reduction = "pca", dims = 1:30, verbose = FALSE)
prca_obj <- FindNeighbors(s_integrated, dims = 1:30) %>%
FindClusters()
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1802
## Number of edges: 72471
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7382
## Number of communities: 8
## Elapsed time: 0 seconds
prca_table <- dcast(as.data.frame(table(prca_obj@active.ident,prca_obj@meta.data[["Stage"]])), Var1 ~ Var2)
rownames(prca_table) <- prca_table$Var1; prca_table <- prca_table[,-1]
show_table(prca_table)
DimPlot(prca_obj, group.by = c("Cohort", "Stage", 'ident'), ncol = 3)
fastMNN: Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors link
Nature Biotechnology, 2018, DOI: 10.1038/nbt.4091
fMNN_obj <- NormalizeData(s_obj) %>%
FindVariableFeatures() %>%
SplitObject(. , split.by = 'Cohort') %>%
RunFastMNN()
fMNN_obj2 <- RunUMAP(fMNN_obj, reduction = 'mnn', dims = 1:30) %>%
FindNeighbors(. , reduction = 'mnn', dims = 1:30) %>%
FindClusters()
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1802
## Number of edges: 76431
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6830
## Number of communities: 6
## Elapsed time: 0 seconds
fMNN_table <- dcast(as.data.frame(table(fMNN_obj2@active.ident,fMNN_obj2@meta.data[["Stage"]])), Var1 ~ Var2)
rownames(fMNN_table) <- fMNN_table$Var1; fMNN_table <- fMNN_table[,-1]
show_table(fMNN_table)
DimPlot(fMNN_obj2, group.by = c("Cohort", "Stage", 'ident'), ncol = 3)
Fast, sensitive, and flexible integration of single cell data with Harmony. link
Nature Methods, 2019, DOI: 10.1038/s41592-019-0619-0
hmy_obj <- NormalizeData(s_obj) %>%
FindVariableFeatures() %>%
ScaleData() %>%
RunPCA(. , verbose = FALSE)
hmy_obj2 <- RunHarmony(object = hmy_obj, group.by.vars = 'Cohort') %>%
RunUMAP(., reduction = "harmony", dims = 1:30) %>%
FindNeighbors(., reduction = "harmony", dims = 1:30) %>%
FindClusters()
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1802
## Number of edges: 78214
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7433
## Number of communities: 9
## Elapsed time: 0 seconds
hmy_table <- dcast(as.data.frame(table(hmy_obj2@active.ident,hmy_obj2@meta.data[["Stage"]])), Var1 ~ Var2)
rownames(hmy_table) <- hmy_table$Var1; hmy_table <- hmy_table[,-1]
show_table(hmy_table)
DimPlot(hmy_obj2, group.by = c("Cohort", "Stage", 'ident'), ncol = 3)